/***********************************************************************
* Title: Workhorses of Opportunity, Howard and Weinstein
*
* Inputs:
*   - justnormasylum.dta: county-level indicator for former normal schools
*   - county_outcomes_dta.dta: Chetty county-level mobility and social outcomes
*
* Outputs:
*   - distributional_graphs/[outcome]_with_average.pdf: 
*     Graphs of normal county effects by parental income percentile, 
*     overlaid with untreated county averages
***********************************************************************/

*---------------------------------------------------------------*
* 0. Load and prep normal/asylum county indicator data
*---------------------------------------------------------------*

clear
use "justnormasylum"
rename hasnormalschool hasnormal
keep hasnormal cty_fips 
tempfile normal
save `normal'

*---------------------------------------------------------------*
* 1. Load main county-level outcome data and merge
*---------------------------------------------------------------*

clear
clear matrix
clear mata
set maxvar 15000

use county_outcomes_dta
gen cty_fips = state*1000 + county
rename state statefips
rename county countychetty
merge 1:m cty_fips using `normal', keep(3)

*---------------------------------------------------------------*
* 2. Replicate teen birth to pooled and reshape to long form
*---------------------------------------------------------------*

foreach percentile in p1 p25 p50 p75 p100 {
	gen teenbrth_pooled_pooled_`percentile' = teenbrth_pooled_female_`percentile'
}

keep has_dad_pooled_pooled_* has_mom_pooled_pooled_* coll_pooled_pooled_* comcoll_pooled_pooled_* somecoll_pooled_pooled_* ///
     hs_pooled_pooled_* married_pooled_pooled_* teenbrth_pooled_pooled_* jail_pooled_pooled_* staycz_pooled_pooled_* ///
     stayhome_pooled_pooled_* kfr_pooled_pooled_* working_pooled_pooled_* kir_pooled_pooled_* cty_fips hasnormal statefips 
drop *_mean *_n

reshape long has_dad_pooled_pooled_ has_mom_pooled_pooled_ coll_pooled_pooled_ comcoll_pooled_pooled_ somecoll_pooled_pooled_ ///
    hs_pooled_pooled_ married_pooled_pooled_ teenbrth_pooled_pooled_ jail_pooled_pooled_ staycz_pooled_pooled_ ///
    stayhome_pooled_pooled_ kfr_pooled_pooled_ working_pooled_pooled_ kir_pooled_pooled_, i(cty_fips) j(percentile) string

*---------------------------------------------------------------*
* 3. Principal component of pooled outcomes and reshape wide
*---------------------------------------------------------------*

pca coll_pooled_pooled_ comcoll_pooled_pooled_ somecoll_pooled_pooled_ hs_pooled_pooled_ married_pooled_pooled_ ///
    teenbrth_pooled_pooled_ jail_pooled_pooled_ staycz_pooled_pooled_ stayhome_pooled_pooled_ kfr_pooled_pooled_ ///
    working_pooled_pooled_ kir_pooled_pooled_
predict pr_comp_pooled_pooled_

reshape wide has_dad_pooled_pooled_ has_mom_pooled_pooled_ coll_pooled_pooled_ comcoll_pooled_pooled_ somecoll_pooled_pooled_ ///
    hs_pooled_pooled_ married_pooled_pooled_ teenbrth_pooled_pooled jail_pooled_pooled_ staycz_pooled_pooled_ ///
    stayhome_pooled_pooled_ kfr_pooled_pooled_ working_pooled_pooled_ kir_pooled_pooled_ pr_comp_pooled_pooled, i(cty_fips) j(percentile) string

*---------------------------------------------------------------*
* 4. Expand to generate one row per percentile per county
*---------------------------------------------------------------*

gen obsnum = _n
expand 5
bys obsnum: gen obscount = _n

gen percentile = "p1"     if obscount == 1
replace percentile = "p25"  if obscount == 2
replace percentile = "p50"  if obscount == 3
replace percentile = "p75"  if obscount == 4
replace percentile = "p100" if obscount == 5

encode percentile, gen(percentilecode)
gen percentile_num = (obscount * 25 - 25 + (obscount == 1)) / 100 - 0.5

*---------------------------------------------------------------*
* 5. Run regressions and store coefficient estimates
*---------------------------------------------------------------*

gen outcome = .

foreach var in has_dad has_mom hs coll comcoll somecoll married teenbrth jail staycz stayhome working kfr kir pr_comp {
	
	foreach pct in p1 p25 p50 p75 p100 {
		replace outcome = `var'_pooled_pooled_`pct' if percentile == "`pct'"
	}
	replace outcome = outcome * 100 if inlist("`var'", "kfr", "kir")
	
	qui reghdfe outcome i.percentilecode#1.hasnormal, absorb(statefips#percentilecode) cluster(statefips) nocon
	test 1.percentilecode#1.hasnormal = 2.percentilecode#1.hasnormal = 3.percentilecode#1.hasnormal = 4.percentilecode#1.hasnormal = 5.percentilecode#1.hasnormal
	est store `var'

	*-----------------------------------------------------------*
	* 6. Save regression output with outcome averages by income
	*-----------------------------------------------------------*

	preserve
		parmest, fast
		gen obscount = substr(parm,1,1)
		gen percentile = .
		replace percentile = 1    if obscount == "1"
		replace percentile = 25   if obscount == "3"
		replace percentile = 50   if obscount == "4"
		replace percentile = 75   if obscount == "5"
		replace percentile = 100  if obscount == "2"
		tempfile results
		save `results', replace
	restore

	preserve
		drop if hasnormal
		gen pct2 = 1   if percentile == "p1"
		replace pct2 = 25  if percentile == "p25"
		replace pct2 = 50  if percentile == "p50"
		replace pct2 = 75  if percentile == "p75"
		replace pct2 = 100 if percentile == "p100"
		collapse outcome, by(pct2)
		rename pct2 percentile
		merge 1:1 percentile using `results'
		
		gen min90 = -invttail(dof, .05) * stderr + estimate
		gen max90 =  invttail(dof, .05) * stderr + estimate

		twoway rspike min95 max95 percentile || ///
		       rcap min90 max90 percentile, color(dkgreen) || ///
		       scatter estimate percentile, mcolor(dkgreen) yline(0, lpattern(dash) lcolor(gs8)) ///
		       ytitle("Effect of Normal County (Spikes)", size(medlarge)) ///
		       xtitle("Parents' Income Percentile", size(large)) legend(off) xlabel(0 25 50 75 100) || ///
		       line outcome percentile, lcolor(orange_red) lpattern(dash) yaxis(2) ///
		       ytitle("Avg. Value in Non-Normal County (Dashed)", axis(2) size(medlarge)) ///
		       name(`var'_average, replace)

		graph export "`var'_with_average.pdf", as(pdf) replace
	restore
}
